\[\\[.5in]\]

Retrieving Forecast Data

Every week, forecasters submit their predictions to COVID-19 ForecastHub. An AWS bucket that contains the estimates of a handful of signals (e.g., COVID death, cases, hospitalization, etc). Furthermore, the AWS server stores an array of evaluation metrics of these forecasts (e.g., Absolute Error, Weighted Interval Score). Alternatively, the data can be retrieved from covidcast and covideval APIs.

To promote the flexibility to replicate the report, the data used in this report can be easily downloaded as a CSV file. By doing so, the user can generate customized plots or even include their own forecaster.

aheads  <- as.numeric(params$weeks)
url_deaths <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_deaths.rds"
download.file(url_deaths, "eval_deaths.RDS") # download to disk
scores <- readRDS(paste0(here(), "/eval_deaths.RDS"))
scores <- subset(scores, forecaster %in% params$forecasters)
primary_forecaster <- params$primary_forecaster

our_pred_dates <- 
  scores %>%
  filter(forecaster == "CMU-TimeSeries")

our_pred_dates <- unique(our_pred_dates$forecast_date) 
n_dates <- length(our_pred_dates)
forecast_dates <- our_pred_dates[n_dates- 2 *5:2]

scores$forecast_date <- 
  if_else(scores$forecaster %in% c("Karlen-pypm", "CU-select"), scores$forecast_date + 1, scores$forecast_date)

scores <- subset(scores, forecast_date %in% forecast_dates & ahead <= aheads)
results <- intersect_averagers(scores, c("forecaster"), c("forecast_date", "geo_value")) %>%
  select(c("ahead", "geo_value", "forecaster","forecast_date", "data_source", "signal","target_end_date","incidence_period","actual","wis","ae","cov_80"))

results %>%
  group_by(forecast_date) %>%
  summarise(n_distinct(geo_value))
results %>%
  download_this(
    output_name = "results",
    output_extension = ".csv",
    button_label = "Download Predictions Evaluation",
    button_type = "success",
    has_icon = TRUE,
    csv2 = FALSE,
    icon = "fa fa-save"
  )

Overall AE, WIS, Coverage 80

NOTE: Results are based on the following numbers of common locations

Overall Absolute Error

By Weeks Ahead

weeks.label = c("1 week ahead", "2 weeks ahead", "3 weeks ahead", "4 weeks ahead")
names(weeks.label) = c(1, 2, 3, 4)

subtitle = sprintf("Forecasts made over %s to %s",
                   format(min(forecast_dates), "%B %d, %Y"),
                   format(max(forecast_dates), "%B %d, %Y"))

plot_ae <-
  plot_canonical(results, 
                 x = "ahead", 
                 y = "ae", 
                 aggr = mean) +
  labs(title = subtitle, x= "Weeks Ahead", y = "Mean AE",color='Forecasters') +  
#  geom_line(aes(linetype=forecaster, color=forecaster)) +
  geom_point(aes(color=forecaster, text=sprintf("Weeks Ahead: %s<br>Average Error: %s <br>Forecaster: %s", 
                              ahead, 
                              round(ae, digits=2),
                              color)),
             alpha = 0.05) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_y_log10()

if (params$colorblind_palette) {
  plot_ae <- plot_ae + 
    scale_color_viridis_d()
}

ggplotly(plot_ae, tooltip="text", width=1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

Overall Weighted Interval Score

By Forecast Dates

plot_wis <-
  plot_canonical(results, 
                 x = "forecast_date", 
                 y = "wis", 
                 aggr = mean,
                 grp_vars = c("forecaster","ahead"), 
                 facet_rows = "ahead") + 
  labs(title = subtitle, 
       x = "Forecast Dates", 
       y = "Mean WIS",
       color = "Forecasters") +
  geom_point(aes(text=sprintf("Forecast Date: %s<br>Mean WIS: %s <br>Forecaster: %s", 
                              forecast_date, 
                              round(wis, digits = 2),
                              color)),
             alpha = 0.05) +
  facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead=weeks.label)) +
  theme(strip.background = element_rect(fill = "white")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = "center")) + 
  scale_y_log10()

if (params$colorblind_palette) {
  plot_wis <- plot_wis + 
    scale_color_viridis_d()
}

ggplotly(plot_wis, tooltip="text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

Overall Coverage 80

By Forecast Dates

plot_cov80 <-
  plot_canonical(results, 
                 x = "forecast_date", 
                 y = "cov_80", 
                 aggr = mean,
                 grp_vars = c("forecaster","ahead"), 
                 facet_rows = "ahead") +
  labs(title = subtitle, x= "Forecast date", y = "Mean Coverage 80", color='Forecasters') +
  geom_point(aes(text = sprintf("Forecast Date: %s<br>Coverage: %s <br>Forecaster: %s", 
                                forecast_date, 
                                round(cov_80, digits = 2),
                                color)),
             alpha = 0.05) +
  facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
  theme(strip.background = element_rect(fill = "white")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = "center")) +
  geom_line(mapping = aes(y = .8), )

if (params$colorblind_palette) {
  plot_cov80 <- plot_cov80 + 
    scale_color_viridis_d()
}

ggplotly(plot_cov80, tooltip="text", height=800, width=1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

Mean Geometric Relative WIS

Relative to baseline; scale first then take the geometric mean, ignoring a few 0’s.

By Weeks Ahead

geom_mean <- function(x) prod(x)^(1/length(x))
#geom_mean <- exp(mean(log((x+1)/(y+1)))) #still need to figure this out

mean_wis <- 
  plot_canonical(results %>% 
                   filter(wis > 0), 
                 x = "ahead", 
                 y = "wis", 
                 aggr = geom_mean,
                 base_forecaster = "COVIDhub-baseline", 
                 scale_before_aggr = TRUE) + 
  labs(title = subtitle, 
       x = "Weeks Ahead", 
       y = "Geometric mean relative WIS", 
       color = "Forecasters") +
  geom_point(aes(text = sprintf("Weeks Ahead: %s<br>WIS: %s <br>Forecaster: %s", 
                                ahead, 
                                round(wis, digits = 2),
                                color)),
             alpha = 0.05) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = "center")) +
  geom_line(mapping = aes(y = 1))

if (params$colorblind_palette) {
  mean_wis <- mean_wis + 
    scale_color_viridis_d()
}

ggplotly(mean_wis, tooltip="text", width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

By Forecast Dates

mean_wis_forecast_date <- 
plot_canonical(results %>% 
                 filter(wis > 0), 
               x = "forecast_date", 
               y = "wis", 
               aggr = geom_mean, 
               facet_rows = "ahead",
               grp_vars = c("forecaster", "ahead"),
               base_forecaster = "COVIDhub-baseline", 
               scale_before_aggr = TRUE) +
  theme(legend.position = "bottom") + 
  labs(title = subtitle, 
       x = "Forecast date", 
       y = "Geometric mean relative WIS", 
       color = "Forecasters") +
  geom_point(aes(text = sprintf("Forecast Date: %s<br>WIS: %s <br>Forecaster: %s", 
                                forecast_date, 
                                round(wis, digits = 2),
                                color)),
             alpha = 0.05) +
  facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
  theme(strip.background = element_rect(fill = "white")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = "center")) +
  geom_line(mapping = aes(y = 1))

if (params$colorblind_palette) {
  mean_wis_forecast_date <- mean_wis_forecast_date + 
    scale_color_viridis_d()
}

ggplotly(mean_wis_forecast_date, tooltip = "text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

Maps

mean score over forecast dates and aheads Note that the results are scaled by population.

library(sf)
maps <- results %>%
  group_by(geo_value, forecaster) %>%
  summarise(across(wis:cov_80, mean)) %>%
  left_join(animalia::state_population, by = "geo_value") %>%
  mutate(across(wis:cov_80, ~ .x / population * 1e5)) %>%
  pivot_longer(wis:cov_80, names_to = "score") %>%
  group_by(score) %>%
  mutate(time_value = Sys.Date(),
         r = max(value)) %>%
  group_by(forecaster, .add = TRUE) %>%
  group_split()

# for county prediction, set geo_type = "county"
maps <- purrr::map(maps, 
                   ~as.covidcast_signal(
                     .x, signal = .x$score[1], 
                     data_source = .x$forecaster[1], 
                     geo_type = "state"))

maps <- purrr::map(maps,
                   ~plot(.x, 
                         choro_col = scales::viridis_pal()(3),
                         range = c(0,.x$r[1])))

nfcasts <- length(unique(results$forecaster))

Mean AE

# original code
cowplot::plot_grid(plotlist = maps[1:nfcasts], ncol = 3)

Mean Coverage 80

cowplot::plot_grid(plotlist = maps[(nfcasts+1):(nfcasts*2)], ncol = 3)

Mean WIS

cowplot::plot_grid(plotlist = maps[((nfcasts*2)+1):length(maps)], ncol = 3)

Trajectory plots

# options(timeout=300)
# url4 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/predictions_cards.rds"
# download.file(url4, "predictions.RDS") # download to disk
# state_predictions <- readRDS(paste0(here(), "/predictions.RDS"))
# 
# 
# # for county prediction, set geo_type = "county"
# state.label = c(state.name, "Washington D.C.", "Porto Rico")
# names(state.label) = c(tolower(state.abb), "dc", "pr")
# 
# # trajectory plots for selected forecaster
# pd <- evalcast:::setup_plot_trajectory(
#   state_predictions %>% filter(forecaster=="CMU-TimeSeries" & forecast_date %in% forecast_dates),
#   intervals = 0.8,
#   geo_type = "state",
#   start_day = min(forecast_dates) - 60)
# 
# g <- ggplot(pd$truth_df, mapping = aes(x = target_end_date))
# # build the fan
# g <- g + geom_ribbon(
#   data = pd$quantiles_df %>%
#     mutate(upper = round(upper, 2),
#            lower = round(lower, 2)),
#   mapping = aes(ymin = lower, 
#                 ymax = upper, 
#                 fill = forecaster,
#                 group = interaction(forecaster, forecast_date)),
#   alpha = .1) +
#   scale_fill_viridis_d(begin=.15, end=.85)
# 
# # line layer
# g <- g +
#   #geom_line(aes(y = .data$value.y), color = "#3182BD") + # corrected
#   geom_line(aes(y = value), size = .5) + # reported
#   geom_line(data = pd$points_df,
#             mapping = aes(y = value,
#                           color = forecaster, 
#                           group = interaction(forecaster, forecast_date)), 
#             size = .5) +
#   geom_point(aes(y = value), size = 1) + # reported gets dots
#   geom_point(data = pd$points_df %>%
#                mutate(value = round(value, 2)),
#              mapping = aes(y = value, color = forecaster),
#              size = 1) +
#   scale_color_viridis_d(begin=.15, end=.85)
# 
# 
# states <- g  +
#   facet_wrap(~geo_value, 
#              scales = "free_y", 
#              ncol = 3, 
#              labeller = labeller(geo_value = state.label)) +
#   labs(x = "", y = "") +
#   theme_bw() + 
#   theme(legend.position = "none", strip.background = element_rect(fill = "white"))
# 
# ggplotly(states, height=5000, width= 1000)